home *** CD-ROM | disk | FTP | other *** search
/ Enigma Amiga Life 109 / EnigmaAmiga109CD.iso / dalla rivista / host contacted / jikes.lha / jikes-1.11 / src / stream.cpp < prev    next >
C/C++ Source or Header  |  2000-01-16  |  39KB  |  1,111 lines

  1. // $Id: stream.cpp,v 1.20 1999/12/14 17:31:53 lord Exp $
  2. //
  3. // This software is subject to the terms of the IBM Jikes Compiler
  4. // License Agreement available at the following URL:
  5. // http://www.ibm.com/research/jikes.
  6. // Copyright (C) 1996, 1998, International Business Machines Corporation
  7. // and others.  All Rights Reserved.
  8. // You must accept the terms of that agreement to use this software.
  9. //
  10. #include "config.h"
  11. #include <ctype.h>
  12. #include "stream.h"
  13. #include "code.h"
  14. #include "zip.h"
  15. #include "symbol.h"
  16. #include "control.h"
  17. #include "semantic.h"
  18.  
  19. #ifdef HAVE_LIB_ICU_UC
  20. # include <ucnv.h>
  21. #endif
  22.  
  23. wchar_t *LexStream::KeywordName(int kind)
  24. {
  25.     switch(kind)
  26.     {
  27.         case TK_abstract:     return StringConstant::US_abstract; break;
  28.         case TK_boolean:      return StringConstant::US_boolean;  break;
  29.         case TK_break:        return StringConstant::US_break;    break;
  30.         case TK_byte:         return StringConstant::US_byte;     break;
  31.         case TK_case:         return StringConstant::US_case; break;
  32.         case TK_catch:        return StringConstant::US_catch; break;
  33.         case TK_char:         return StringConstant::US_char; break;
  34.         case TK_class:        return StringConstant::US_class; break;
  35.         case TK_const:        return StringConstant::US_const; break;
  36.         case TK_continue:     return StringConstant::US_continue; break;
  37.         case TK_default:      return StringConstant::US_default; break;
  38.         case TK_do:           return StringConstant::US_do; break;
  39.         case TK_double:       return StringConstant::US_double; break;
  40.         case TK_else:         return StringConstant::US_else; break;
  41.         case TK_extends:      return StringConstant::US_extends; break;
  42.         case TK_false:        return StringConstant::US_false; break;
  43.         case TK_final:        return StringConstant::US_final; break;
  44.         case TK_finally:      return StringConstant::US_finally; break;
  45.         case TK_float:        return StringConstant::US_float; break;
  46.         case TK_for:          return StringConstant::US_for; break;
  47.         case TK_goto:         return StringConstant::US_goto; break;
  48.         case TK_if:           return StringConstant::US_if; break;
  49.         case TK_implements:   return StringConstant::US_implements; break;
  50.         case TK_import:       return StringConstant::US_import; break;
  51.         case TK_instanceof:   return StringConstant::US_instanceof; break;
  52.         case TK_int:          return StringConstant::US_int; break;
  53.         case TK_interface:    return StringConstant::US_interface; break;
  54.         case TK_long:         return StringConstant::US_long; break;
  55.         case TK_native:       return StringConstant::US_native; break;
  56.         case TK_new:          return StringConstant::US_new; break;
  57.         case TK_null:         return StringConstant::US_null; break;
  58.         case TK_package:      return StringConstant::US_package; break;
  59.         case TK_private:      return StringConstant::US_private; break;
  60.         case TK_protected:    return StringConstant::US_protected; break;
  61.         case TK_public:       return StringConstant::US_public; break;
  62.         case TK_return:       return StringConstant::US_return; break;
  63.         case TK_short:        return StringConstant::US_short; break;
  64.         case TK_static:       return StringConstant::US_static; break;
  65.         case TK_strictfp:     return StringConstant::US_strictfp; break;
  66.         case TK_super:        return StringConstant::US_super; break;
  67.         case TK_switch:       return StringConstant::US_switch; break;
  68.         case TK_synchronized: return StringConstant::US_synchronized; break;
  69.         case TK_this:         return StringConstant::US_this; break;
  70.         case TK_throw:        return StringConstant::US_throw; break;
  71.         case TK_throws:       return StringConstant::US_throws; break;
  72.         case TK_transient:    return StringConstant::US_transient; break;
  73.         case TK_true:         return StringConstant::US_true; break;
  74.         case TK_try:          return StringConstant::US_try; break;
  75.         case TK_void:         return StringConstant::US_void; break;
  76.         case TK_volatile:     return StringConstant::US_volatile; break;
  77.         case TK_while:        return StringConstant::US_while; break;
  78.  
  79.         case TK_PLUS_PLUS:                  return StringConstant::US_PLUS_PLUS; break;
  80.         case TK_MINUS_MINUS:                return StringConstant::US_MINUS_MINUS; break;
  81.         case TK_EQUAL_EQUAL:                return StringConstant::US_EQUAL_EQUAL; break;
  82.         case TK_LESS_EQUAL:                 return StringConstant::US_LESS_EQUAL; break;
  83.         case TK_GREATER_EQUAL:              return StringConstant::US_GREATER_EQUAL; break;
  84.         case TK_NOT_EQUAL:                  return StringConstant::US_NOT_EQUAL; break;
  85.         case TK_LEFT_SHIFT:                 return StringConstant::US_LEFT_SHIFT; break;
  86.         case TK_RIGHT_SHIFT:                return StringConstant::US_RIGHT_SHIFT; break;
  87.         case TK_UNSIGNED_RIGHT_SHIFT:       return StringConstant::US_UNSIGNED_RIGHT_SHIFT; break;
  88.         case TK_PLUS_EQUAL:                 return StringConstant::US_PLUS_EQUAL; break;
  89.         case TK_MINUS_EQUAL:                return StringConstant::US_MINUS_EQUAL; break;
  90.         case TK_MULTIPLY_EQUAL:             return StringConstant::US_MULTIPLY_EQUAL; break;
  91.         case TK_DIVIDE_EQUAL:               return StringConstant::US_DIVIDE_EQUAL; break;
  92.         case TK_AND_EQUAL:                  return StringConstant::US_AND_EQUAL; break;
  93.         case TK_OR_EQUAL:                   return StringConstant::US_OR_EQUAL; break;
  94.         case TK_XOR_EQUAL:                  return StringConstant::US_XOR_EQUAL; break;
  95.         case TK_REMAINDER_EQUAL:            return StringConstant::US_REMAINDER_EQUAL; break;
  96.         case TK_LEFT_SHIFT_EQUAL:           return StringConstant::US_LEFT_SHIFT_EQUAL; break;
  97.         case TK_RIGHT_SHIFT_EQUAL:          return StringConstant::US_RIGHT_SHIFT_EQUAL; break;
  98.         case TK_UNSIGNED_RIGHT_SHIFT_EQUAL: return StringConstant::US_UNSIGNED_RIGHT_SHIFT_EQUAL; break;
  99.         case TK_OR_OR:                      return StringConstant::US_OR_OR; break;
  100.         case TK_AND_AND:                    return StringConstant::US_AND_AND; break;
  101.  
  102.         case TK_PLUS:                       return StringConstant::US_PLUS; break;
  103.         case TK_MINUS:                      return StringConstant::US_MINUS; break;
  104.         case TK_NOT:                        return StringConstant::US_NOT; break;
  105.         case TK_REMAINDER:                  return StringConstant::US_REMAINDER; break;
  106.         case TK_XOR:                        return StringConstant::US_XOR; break;
  107.         case TK_AND:                        return StringConstant::US_AND; break;
  108.         case TK_MULTIPLY:                   return StringConstant::US_MULTIPLY; break;
  109.         case TK_OR:                         return StringConstant::US_OR; break;
  110.         case TK_TWIDDLE:                    return StringConstant::US_TWIDDLE; break;
  111.         case TK_DIVIDE:                     return StringConstant::US_DIVIDE; break;
  112.         case TK_GREATER:                    return StringConstant::US_GREATER; break;
  113.         case TK_LESS:                       return StringConstant::US_LESS; break;
  114.         case TK_LPAREN:                     return StringConstant::US_LPAREN; break;
  115.         case TK_RPAREN:                     return StringConstant::US_RPAREN; break;
  116.         case TK_LBRACE:                     return StringConstant::US_LBRACE; break;
  117.         case TK_RBRACE:                     return StringConstant::US_RBRACE; break;
  118.         case TK_LBRACKET:                   return StringConstant::US_LBRACKET; break;
  119.         case TK_RBRACKET:                   return StringConstant::US_RBRACKET; break;
  120.         case TK_SEMICOLON:                  return StringConstant::US_SEMICOLON; break;
  121.         case TK_QUESTION:                   return StringConstant::US_QUESTION; break;
  122.         case TK_COLON:                      return StringConstant::US_COLON; break;
  123.         case TK_COMMA:                      return StringConstant::US_COMMA; break;
  124.         case TK_DOT:                        return StringConstant::US_DOT; break;
  125.         case TK_EQUAL:                      return StringConstant::US_EQUAL; break;
  126.         case TK_EOF:                        return StringConstant::US_EOF; break;
  127.         default:                            break;
  128.     }
  129.  
  130.     return StringConstant::US_EMPTY;
  131. }
  132.  
  133.  
  134. LexStream::~LexStream()
  135. {
  136. #ifdef TEST
  137.     control.line_count += (file_read * (line_location.Length() - 3));
  138. #endif
  139.  
  140.     DestroyInput();
  141.  
  142.     delete [] columns;
  143.     delete [] comment_buffer;
  144.     comment_buffer = NULL;
  145. }
  146.  
  147.  
  148. //
  149. //
  150. //
  151. ::LiteralSymbol *LexStream::LiteralSymbol(TokenIndex i)
  152. {
  153.     Symbol *symbol = tokens[i].additional_info.symbol;
  154.     return (symbol && (Kind(i) != TK_LBRACE) ? symbol -> LiteralCast() : (::LiteralSymbol *) NULL);
  155. }
  156.  
  157.  
  158. //
  159. //
  160. //
  161. ::NameSymbol *LexStream::NameSymbol(TokenIndex i)
  162. {
  163.     Symbol *symbol = tokens[i].additional_info.symbol;
  164.     return (symbol && (Kind(i) != TK_LBRACE) ? symbol -> NameCast() : (::NameSymbol *) NULL);
  165. }
  166.  
  167.  
  168. //
  169. // Name of input file where the token appeared.
  170. //
  171. char *LexStream::FileName() { return file_symbol -> FileName(); }
  172. size_t LexStream::FileNameLength() { return file_symbol -> FileNameLength(); }
  173.  
  174.  
  175. void LexStream::InitializeColumns()
  176. {
  177.     if (! columns)
  178.     {
  179.         columns = new unsigned short[token_stream.Length()];
  180.  
  181.         int start = 0,
  182.             k = 1;
  183.  
  184.         for (size_t i = 0; i < input_buffer_length; i++)
  185.         {
  186.             if (Code::IsNewline(input_buffer[i]))
  187.                 start = i;
  188.             else
  189.             {
  190.                 if (input_buffer[i] == U_HORIZONTAL_TAB)
  191.                 {
  192.                     int offset = (i - start) - 1;
  193.                     start -= ((Tab::TabSize() - 1) - offset % Tab::TabSize());
  194.                 }
  195.                 else if (tokens[k].Location() == i)
  196.                 {
  197.                     int col = i - start;
  198.                     columns[k++] = (col < USHRT_MAX ? col : 0);
  199.                 }
  200.             }
  201.         }
  202.     }
  203.  
  204.     return;
  205. }
  206.  
  207.  
  208. //
  209. //
  210. //
  211. void LexStream::CompressSpace()
  212. {
  213.     tokens = token_stream.Array();
  214.     if (control.option.dump_errors)
  215.         InitializeColumns();
  216.     comments = comment_stream.Array();
  217.     locations = line_location.Array();
  218.     types = type_index.Array();
  219.  
  220.     return;
  221. }
  222.  
  223.  
  224. //
  225. // Find and return the index of the first comment that immediately
  226. // follows tok. Return 0 if there is not a comment that immediately
  227. // follows tok.
  228. //
  229. LexStream::CommentIndex LexStream::FirstComment(TokenIndex tok)
  230. {
  231.     unsigned location = Location(tok);
  232.     int lo = 0,
  233.         hi = comment_stream.Length() - 1,
  234.         i = 0;
  235.  
  236.     if (lo < hi)
  237.     {
  238.         do
  239.         {
  240.             int mid = (lo + hi) / 2;
  241.  
  242.             if (comment_stream[mid].location < location)
  243.                  lo = mid + 1;
  244.             else hi = mid - 1;
  245.         } while (lo < hi);
  246.  
  247.         //
  248.         // at this stage lo == hi
  249.         //
  250.         i = (comment_stream[lo].location > location ? lo : lo + 1);
  251.     }
  252.  
  253.     return (i < comment_stream.Length() && comment_stream[i].previous_token == tok ? i : 0);
  254. }
  255.  
  256.  
  257. unsigned LexStream::FindLine(unsigned location)
  258. {
  259.     int lo = 0,
  260.         hi = line_location.Length() - 1;
  261.  
  262. assert(locations);
  263.     //
  264.     // we can place the exit test at the bottom of the loop
  265.     // since the line_location array will always contain at least
  266.     // one element.
  267.     //
  268.     do
  269.     {
  270.         int mid = (lo + hi) / 2;
  271.  
  272.         if (locations[mid] == location)
  273.             return mid;
  274.         if (locations[mid] < location)
  275.              lo = mid + 1;
  276.         else hi = mid - 1;
  277.     } while (lo < hi);
  278.  
  279.     return (locations[lo] > location ? lo - 1 : lo);
  280. }
  281.  
  282.  
  283. void LexStream::ReadInput()
  284. {
  285.     if (file_symbol -> buffer)
  286.     {
  287.         ProcessInput(file_symbol -> buffer, strlen(file_symbol -> buffer));
  288.     }
  289.     else if (file_symbol -> IsZip()) {
  290.         ZipFile *zipfile = new ZipFile(file_symbol);
  291.  
  292.         if (zipfile -> Buffer() == NULL)
  293.         {
  294.             fprintf(stderr, "chaos: Don\'t know how to process compressed (\".java\") source in a zip file\n");
  295.             assert(false);
  296.         }
  297.         else if (! file_symbol -> lex_stream) // Once the zip file is loaded, it never changes. So, we only read it the first time
  298.         {
  299.             file_symbol -> lex_stream = this;
  300.             ProcessInput(zipfile -> Buffer(), file_symbol -> uncompressed_size);
  301.         }
  302.         delete zipfile;
  303.     }
  304.     else
  305.     {
  306.         struct stat status;
  307.         ::SystemStat(FileName(), &status);
  308.  
  309.         file_symbol -> mtime = status.st_mtime; // actual time stamp of file read
  310.         file_symbol -> lex_stream = this;
  311.  
  312. #if defined(UNIX_FILE_SYSTEM) || defined(AMIGAOS_FILE_SYSTEM)
  313.         FILE *srcfile = ::SystemFopen(FileName(), "r");
  314.         if (srcfile != NULL)
  315.         {
  316.             char *buffer = new char[status.st_size];
  317.             size_t file_size = ::SystemFread(buffer, sizeof(char), status.st_size, srcfile);
  318.             fclose(srcfile);
  319.             ProcessInput(buffer, file_size);
  320.             delete [] buffer;
  321.         }
  322. #elif defined(WIN32_FILE_SYSTEM)
  323. #include <windows.h>
  324.         HANDLE srcfile = CreateFile(FileName(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
  325.         if (srcfile != INVALID_HANDLE_VALUE)
  326.         {
  327.             HANDLE mapfile = CreateFileMapping(srcfile, NULL, PAGE_READONLY, 0, 0, NULL);
  328.             if (mapfile != INVALID_HANDLE_VALUE)
  329.             {
  330.                 char *buffer = (char *) MapViewOfFile(mapfile, FILE_MAP_READ, 0, 0, 0);
  331.                 DWORD file_size = GetFileSize(srcfile, NULL);
  332.                 ProcessInput(buffer, file_size);
  333.                 if (buffer)
  334.                     UnmapViewOfFile(buffer);
  335.                 CloseHandle(mapfile);
  336.             }
  337.  
  338.             CloseHandle(srcfile);
  339.         }
  340. #endif
  341.     }
  342.  
  343.     initial_reading_of_input = false;
  344.  
  345.     return;
  346. }
  347.  
  348. void LexStream::RereadInput()
  349. {
  350.     if (input_buffer) // if input already available, do nothing
  351.         ;
  352. #ifdef TEST
  353.     else if (file_symbol -> buffer)
  354.     {
  355.       fprintf(stderr, "chaos: Don\'t know how to RereadInput a buffer\n");
  356.       assert(false);
  357.     }
  358. #endif
  359.     else if (file_symbol -> IsZip())
  360.     {
  361.         ZipFile *zipfile = new ZipFile(file_symbol);
  362.  
  363.         if (zipfile -> Buffer() == NULL)
  364.         {
  365.             fprintf(stderr, "chaos: Don\'t know how to process compressed (\".java\") source in a zip file\n");
  366.             assert(false);
  367.         }
  368.         else ProcessInput(zipfile -> Buffer(), file_symbol -> uncompressed_size);
  369.         delete zipfile;
  370.     }
  371.     else
  372.     {
  373.         struct stat status;
  374.         ::SystemStat(FileName(), &status);
  375.  
  376.         if (status.st_mtime == file_symbol -> mtime)
  377.         {
  378. #if defined(UNIX_FILE_SYSTEM) || defined(AMIGAOS_FILE_SYSTEM)
  379.             FILE *srcfile = ::SystemFopen(FileName(), "r");
  380.             if (srcfile != NULL)
  381.             {
  382.                 char *buffer = new char[status.st_size];
  383.                 size_t file_size = ::SystemFread(buffer, sizeof(char), status.st_size, srcfile);
  384.                 fclose(srcfile);
  385.                 ProcessInput(buffer, file_size);
  386.                 delete [] buffer;
  387.             }
  388. #elif defined(WIN32_FILE_SYSTEM)
  389.             HANDLE srcfile = CreateFile(FileName(), GENERIC_READ, FILE_SHARE_READ,
  390.                                         NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
  391.             if (srcfile != INVALID_HANDLE_VALUE)
  392.             {
  393.                 HANDLE mapfile = CreateFileMapping(srcfile, NULL, PAGE_READONLY, 0, 0, NULL);
  394.                 if (mapfile != INVALID_HANDLE_VALUE)
  395.                 {
  396.                     char *buffer = (char *) MapViewOfFile(mapfile, FILE_MAP_READ, 0, 0, 0);
  397.                     DWORD file_size = GetFileSize(srcfile, NULL);
  398.                     ProcessInput(buffer, file_size);
  399.                     if (buffer)
  400.                         UnmapViewOfFile(buffer);
  401.                     CloseHandle(mapfile);
  402.                 }
  403.  
  404.                 CloseHandle(srcfile);
  405.             }
  406. #endif
  407.         }
  408.         else
  409.         {
  410.             // TODO: File has changed !!!
  411.         }
  412.     }
  413.  
  414.     return;
  415. }
  416.  
  417.  
  418. int LexStream::hexvalue(wchar_t ch)
  419. {
  420.     switch(ch)
  421.     {
  422.     case U_a: case U_A:
  423.         return 10;
  424.     case U_b: case U_B:
  425.         return 11;
  426.     case U_c: case U_C:
  427.         return 12;
  428.     case U_d: case U_D:
  429.         return 13;
  430.     case U_e: case U_E:
  431.         return 14;
  432.     case U_f: case U_F:
  433.         return 15;
  434.     default:
  435.         return ch - U_0;
  436.     }
  437. }
  438.  
  439. //
  440. // Read filesize  characters from srcfile, convert them to unicode, and
  441. // store them in input_buffer.
  442. //
  443. void LexStream::ProcessInput(char *buffer, long filesize)
  444. {
  445. #ifdef HAVE_LIB_ICU_UC
  446.     LexStream::ProcessInputUnicode(buffer,filesize);
  447. #else
  448.     LexStream::ProcessInputAscii(buffer, filesize);
  449. #endif
  450. }
  451.  
  452. //
  453. // Read file_size Ascii characters from srcfile, convert them to unicode and
  454. // store them in input_buffer.
  455. //
  456. void LexStream::ProcessInputAscii(char *buffer, long filesize)
  457. {
  458. #ifdef TEST
  459.     file_read++;
  460. #endif
  461.  
  462.     input_buffer = new wchar_t[filesize + 4];
  463.     wchar_t *input_ptr = input_buffer;
  464.     *input_ptr = U_LINE_FEED; // add an initial '\n';
  465.  
  466.     if (buffer)
  467.     {
  468.         char *source_ptr = buffer,
  469.              *source_tail = &(buffer[filesize - 1]); // point to last character read from the file.
  470.  
  471.         while(source_ptr <= source_tail)
  472.         {
  473.             *(++input_ptr) = (*source_ptr++) & 0x00ff; // The (& 0x00ff) guarantees that quantity is copied as unsigned value
  474.  
  475.             if (*input_ptr == U_CARRIAGE_RETURN)
  476.             {
  477.                 *input_ptr = U_LINE_FEED;
  478.                 if (*source_ptr == U_LINE_FEED)
  479.                     source_ptr++;
  480.             }
  481.             else if (*input_ptr == U_BACKSLASH)
  482.             {
  483.                 if (*source_ptr == U_BACKSLASH)
  484.                     *(++input_ptr) = *source_ptr++;
  485.                 else if (*source_ptr == U_u)
  486.                 {
  487.                     char *u_ptr = source_ptr;
  488.  
  489.                     for (source_ptr++; source_ptr <= source_tail && *source_ptr == U_u; source_ptr++)
  490.                         ;
  491.                     *input_ptr = 0;
  492.                     int i;
  493.                     for (i = 0; source_ptr <= source_tail && isxdigit(*source_ptr) && i < 4; i++)
  494.                     {
  495.                         int multiplier[4] = {4096, 256, 16, 1};
  496.  
  497.                         char ch = *source_ptr++;
  498.                         switch(ch)
  499.                         {
  500.                             case U_a: case U_A:
  501.                                 *input_ptr += (10 * multiplier[i]);
  502.                                 break;
  503.                             case U_b: case U_B:
  504.                                 *input_ptr += (11 * multiplier[i]);
  505.                                 break;
  506.                             case U_c: case U_C:
  507.                                 *input_ptr += (12 * multiplier[i]);
  508.                                 break;
  509.                             case U_d: case U_D:
  510.                                 *input_ptr += (13 * multiplier[i]);
  511.                                 break;
  512.                             case U_e: case U_E:
  513.                                 *input_ptr += (14 * multiplier[i]);
  514.                                 break;
  515.                             case U_f: case U_F:
  516.                                 *input_ptr += (15 * multiplier[i]);
  517.                                 break;
  518.                             default:
  519.                                 *input_ptr += ((ch - U_0) * multiplier[i]);
  520.                         }
  521.                     }
  522.  
  523.                     if (i != 4)
  524.                     {
  525.                         if (initial_reading_of_input)
  526.                             bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
  527.                                                          (unsigned) (input_ptr - input_buffer),
  528.                                                          (unsigned) (input_ptr - input_buffer) + (source_ptr - u_ptr));
  529.  
  530.                         source_ptr = u_ptr;
  531.                         *input_ptr = U_BACKSLASH;
  532.                     }
  533.                     else if (*input_ptr == U_CARRIAGE_RETURN)
  534.                     {
  535.                         *input_ptr = U_LINE_FEED;
  536.                         if (*source_ptr == U_LINE_FEED)
  537.                             source_ptr++;
  538.                         else if (*source_ptr == U_BACKSLASH)
  539.                         {
  540.                             int i;
  541.                             for (i = 1; (source_ptr + i) <= source_tail && source_ptr[i] == U_u; i++)
  542.                                 ;
  543.                             if (i > 1 && (source_ptr + i + 3) <= source_tail
  544.                                       && source_ptr[i]     == U_0
  545.                                       && source_ptr[i + 1] == U_0
  546.                                       && source_ptr[i + 2] == U_0
  547.                                       && source_ptr[i + 3] == U_a) // the escape sequence of \n is \u000a
  548.                                 source_ptr += (i + 4);
  549.                         }
  550.                     }
  551.                 }
  552.             }
  553.         }
  554.  
  555.         //
  556.         // Remove all trailing spaces
  557.         //
  558.         while((input_ptr > input_buffer) && Code::IsSpace(*input_ptr))
  559.             input_ptr--;
  560.     }
  561.  
  562.     //
  563.     // If the very last character is not CTL_Z then add CTL_Z
  564.     //
  565.     if (*input_ptr != U_CTL_Z)
  566.     {
  567.         if (*input_ptr != U_LINE_FEED)
  568.             *(++input_ptr) = U_LINE_FEED; // if the last character is not end-of-line, add end-of-line
  569.         *(++input_ptr) = U_CTL_Z;         // Mark end-of-file
  570.     }
  571.     *(++input_ptr) = U_NULL;              // add gate
  572.  
  573.     input_buffer_length = input_ptr - input_buffer;
  574.  
  575.     return;
  576. }
  577.  
  578. #ifdef HAVE_LIB_ICU_UC
  579. //
  580. // Read file_size Ascii characters from srcfile, convert them to unicode, and
  581. // store them in input_buffer.
  582. //
  583. void LexStream::ProcessInputUnicode(char *buffer, long filesize)
  584. {
  585. #ifdef TEST
  586.     file_read++;
  587. #endif
  588.  
  589. #ifdef HAVE_LIB_ICU_UC
  590.     input_buffer       = new wchar_t[filesize + 4 + 2];
  591.     wchar_t *input_tail = input_buffer + filesize;
  592. #else
  593.     input_buffer = new wchar_t[filesize + 4];
  594. #endif
  595.     
  596.     wchar_t *input_ptr = input_buffer;
  597.     *input_ptr = U_LINE_FEED; // add an initial '\n';
  598.     
  599.     if(buffer)
  600.     {
  601.         int      escape_value;
  602.         wchar_t *escape_ptr;
  603.         const char *source_ptr = buffer,
  604.             *source_tail = &(buffer[filesize - 1]); // point to last character read from the file.
  605.         
  606.         UnicodeLexerState saved_state;
  607.         UnicodeLexerState state=RAW;
  608.         bool oncemore=false;
  609.  
  610. #ifdef HAVE_LIB_ICU_UC
  611.         UErrorCode err = U_ZERO_ERROR;
  612. #endif
  613.  
  614.         while((source_ptr <= source_tail) || oncemore)
  615.         {
  616. #ifdef HAVE_LIB_ICU_UC
  617.             // On each iteration we advance input_ptr maximun 2 postions.
  618.             // Here we check if we are close to the end of input_buffer
  619.             if(input_ptr>=input_tail)
  620.             {
  621.                 // If this happen, reallocate it with some more space.
  622.                 // This is very rare case, which could happen if
  623.                 // one code page character is represened by several 
  624.                 // unicode characters. One of exaples of such
  625.                 // situation is unicode "surrogates".
  626.                 //
  627.                 // If such reallocation will be required, it will indeed
  628.                 // slow down compilation a bit.
  629.                 size_t cursize = input_ptr-input_buffer;
  630.                 size_t newsize = cursize+cursize/10; // add 10%
  631.                 wchar_t *tmp   = new wchar_t[newsize]; 
  632.                 memcpy(tmp, input_buffer, newsize*sizeof(wchar_t));
  633.                 delete input_buffer;
  634.                 input_buffer = tmp;
  635.                 input_tail = input_buffer + newsize;
  636.                 input_ptr  = input_buffer+cursize;
  637.             }
  638. #endif
  639.             
  640.             wchar_t ch;
  641.             
  642.             if(!oncemore)
  643.             {
  644. #ifdef HAVE_LIB_ICU_UC
  645.                 if(control.option.converter)
  646.                     ch=ucnv_getNextUChar (control.option.converter,
  647.                                           &source_ptr,
  648.                                           source_tail,
  649.                                           &err);
  650.                 else
  651.                     ch=*source_ptr++;
  652.                 if(err!=U_ZERO_ERROR)
  653.                     break;
  654. #else
  655.                 ch=*source_ptr++;
  656. #endif
  657.             } else oncemore = false;
  658.       
  659.             switch(state)
  660.             {
  661.             case QUOTE:
  662.                 if(ch==U_BACKSLASH)
  663.                 {
  664.                     *(++input_ptr) = U_BACKSLASH;
  665.                     *(++input_ptr) = U_BACKSLASH;
  666.                     state          = RAW;
  667.                 } else if(ch==U_u)
  668.                 {
  669.                     escape_ptr = input_ptr;
  670.                     state      = UNICODE_ESCAPE;
  671.                 } else
  672.                 {
  673.                     *(++input_ptr )= U_BACKSLASH;
  674.                     state          = RAW;
  675.                     oncemore       = true;
  676.                 }
  677.                 break;
  678.             case UNICODE_ESCAPE:
  679.                 if(isxdigit(ch))
  680.                 {
  681.                     state=UNICODE_ESCAPE_DIGIT_0;
  682.                     escape_value=hexvalue(ch)*16*16*16;
  683.                 } else if(ch!=U_u)
  684.                 {
  685.                     if(initial_reading_of_input)
  686.                         bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
  687.                                                      (unsigned) (escape_ptr - input_buffer),
  688.                                                      (unsigned) (input_ptr - input_buffer));
  689.                 }
  690.                 break;
  691.             case UNICODE_ESCAPE_DIGIT_0:
  692.                 if(isxdigit(ch))
  693.                 {
  694.                     state=UNICODE_ESCAPE_DIGIT_1;
  695.                     escape_value+=hexvalue(ch)*16*16;
  696.                 } else  
  697.                 {
  698.                     if(initial_reading_of_input)
  699.                         bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
  700.                                                      (unsigned) (escape_ptr - input_buffer),
  701.                                                      (unsigned) (input_ptr - input_buffer));
  702.                 }
  703.                 break;
  704.             case UNICODE_ESCAPE_DIGIT_1:
  705.                 if(isxdigit(ch))
  706.                 {
  707.                     state=UNICODE_ESCAPE_DIGIT_2;
  708.                     escape_value+=hexvalue(ch)*16;
  709.                 } else  
  710.                 {
  711.                     if(initial_reading_of_input)
  712.                         bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
  713.                                                      (unsigned) (escape_ptr - input_buffer),
  714.                                                      (unsigned) (input_ptr - input_buffer));
  715.                 }
  716.                 break;
  717.             case UNICODE_ESCAPE_DIGIT_2:
  718.                 if(isxdigit(ch))
  719.                 {
  720.                     ch       = escape_value+hexvalue(ch);
  721.                     state    = saved_state;
  722.                     saved_state = UNICODE_ESCAPE_DIGIT_2;
  723.                     oncemore = true;
  724.                 } else  
  725.                 {
  726.                     if(initial_reading_of_input)
  727.                         bad_tokens.Next().Initialize(StreamError::INVALID_UNICODE_ESCAPE,
  728.                                                      (unsigned) (escape_ptr - input_buffer),
  729.                                                      (unsigned) (input_ptr - input_buffer));
  730.                 }
  731.                 break;
  732.             case CR:
  733.                 if(ch==U_LINE_FEED)
  734.                 {
  735.                     state = RAW;
  736.                 } else if(ch==U_BACKSLASH && saved_state != UNICODE_ESCAPE_DIGIT_2)
  737.                 {
  738.                     saved_state = CR;
  739.                     state       = QUOTE;
  740.                 } else
  741.                 {
  742.                     state = RAW;
  743.                     *(++input_ptr)=ch;                    
  744.                 }
  745.                 break;
  746.             case RAW:
  747.                 if(ch==U_BACKSLASH && saved_state != UNICODE_ESCAPE_DIGIT_2)
  748.                 {
  749.                     state       = QUOTE;
  750.                 } else if(ch == U_CARRIAGE_RETURN)
  751.                 {
  752.                     state = CR;
  753.                     *(++input_ptr) = U_LINE_FEED;
  754.                 } else
  755.                 {
  756.                     *(++input_ptr)=ch;                    
  757.                 }
  758.                 saved_state = RAW;
  759.                 break;
  760.             }
  761.         }
  762.     }
  763.  
  764.     //
  765.     // If the very last character is not CTL_Z then add CTL_Z
  766.     //
  767.     if (*input_ptr != U_CTL_Z)
  768.     {
  769.         if (*input_ptr != U_LINE_FEED)
  770.             *(++input_ptr) = U_LINE_FEED; // if the last character is not end-of-line, add end-of-line
  771.         *(++input_ptr) = U_CTL_Z;         // Mark end-of-file
  772.     }
  773.     *(++input_ptr) = U_NULL;              // add gate
  774.     
  775.     input_buffer_length = input_ptr - input_buffer;
  776.  
  777.     return;
  778. }
  779. #endif
  780.  
  781. //
  782. // This procedure uses a  quick sort algorithm to sort the stream ERRORS
  783. // by their locations.
  784. //
  785. void LexStream::SortMessages()
  786. {
  787.      int lower,
  788.          upper,
  789.          lostack[32],
  790.          histack[32];
  791.  
  792.      int top,
  793.          i,
  794.          j;
  795.      StreamError pivot,
  796.                  temp;
  797.  
  798.      top = 0;
  799.      lostack[top] = 0;
  800.      histack[top] = bad_tokens.Length() - 1;
  801.  
  802.      while(top >= 0)
  803.      {
  804.          lower = lostack[top];
  805.          upper = histack[top];
  806.          top--;
  807.  
  808.          while(upper > lower)
  809.          {
  810.              //
  811.              // The array is most-likely almost sorted. Therefore,
  812.              // we use the middle element as the pivot element.
  813.              //
  814.              i = (lower + upper) / 2;
  815.              pivot = bad_tokens[i];
  816.              bad_tokens[i] = bad_tokens[lower];
  817.  
  818.              //
  819.              // Split the array section indicated by LOWER and UPPER
  820.              // using ARRAY(LOWER) as the pivot.
  821.              //
  822.              i = lower;
  823.              for (j = lower + 1; j <= upper; j++)
  824.              {
  825.                  if (bad_tokens[j].start_location < pivot.start_location)
  826.                  {
  827.                      temp = bad_tokens[++i];
  828.                      bad_tokens[i] = bad_tokens[j];
  829.                      bad_tokens[j] = temp;
  830.                  }
  831.              }
  832.              bad_tokens[lower] = bad_tokens[i];
  833.              bad_tokens[i] = pivot;
  834.  
  835.              top++;
  836.              if ((i - lower) < (upper - i))
  837.              {
  838.                  lostack[top] = i + 1;
  839.                  histack[top] = upper;
  840.                  upper = i - 1;
  841.              }
  842.              else
  843.              {
  844.                  histack[top] = i - 1;
  845.                  lostack[top] = lower;
  846.                  lower = i + 1;
  847.              }
  848.          }
  849.      }
  850.  
  851.      return;
  852. }
  853.  
  854.  
  855. //
  856. //
  857. //
  858. void LexStream::PrintMessages()
  859. {
  860.     //
  861.     // If control.option.dump_errors then the error messages have already been printed
  862.     //
  863.     if (! control.option.dump_errors)
  864.     {
  865.         RereadInput();
  866.  
  867.         if (control.option.errors)
  868.         {
  869.             char *file_name = FileName();
  870.  
  871.             Coutput << "\nFound " << NumBadTokens() << " lexical error" << (NumBadTokens() == 1 ? "" : "s")
  872.                     << " in \""
  873.                     << file_name
  874.                     << "\":";
  875.  
  876.             if (! input_buffer)
  877.             {
  878.                 int length = FileNameLength();
  879.                 wchar_t *name = new wchar_t[length + 1];
  880.                 for (int i = 0; i < length; i++)
  881.                     name[i] = file_name[i];
  882.                 name[length] = U_NULL;
  883.                 control.system_semantic -> ReportSemError(SemanticError::CANNOT_REOPEN_FILE,
  884.                                                           0,
  885.                                                           0,
  886.                                                           name);
  887.                 delete [] name;
  888.             }
  889.             else
  890.             {
  891.                 for (int i = 0; i < bad_tokens.Length(); i++)
  892.                 {
  893.                     if (FindLine(bad_tokens[i].start_location) == FindLine(bad_tokens[i].end_location))
  894.                          PrintSmallSource(i);
  895.                     else PrintLargeSource(i);
  896.  
  897.                     Coutput << "\n*** Lexical Error: ";
  898.  
  899.                     PrintMessage(bad_tokens[i].kind);
  900.                 }
  901.             }
  902.         }
  903.         else
  904.         {
  905.             for (int i = 0; i < bad_tokens.Length(); i++)
  906.                 PrintEmacsMessage(i);
  907.         }
  908.  
  909.         DestroyInput();
  910.  
  911.         Coutput.flush();
  912.     }
  913.  
  914.     return;
  915. }
  916.  
  917.  
  918. //
  919. //
  920. //
  921. void LexStream::PrintEmacsMessage(int k)
  922. {
  923.     int left_line_no    = FindLine(bad_tokens[k].start_location),
  924.         left_column_no  = FindColumn(bad_tokens[k].start_location),
  925.         right_line_no   = FindLine(bad_tokens[k].end_location),
  926.         right_column_no = FindColumn(bad_tokens[k].end_location);
  927.  
  928.     Coutput << FileName()
  929.             << ':' << left_line_no  << ':' << left_column_no
  930.             << ':' << right_line_no << ':' << right_column_no
  931.             << ":\n    Lexical: ";
  932.  
  933.     PrintMessage(bad_tokens[k].kind);
  934.  
  935.     return;
  936. }
  937.  
  938.  
  939. //
  940. // This procedure is invoked to print a small message that may
  941. // only span a single line. The parameter k points to the error
  942. // message in the error structure.
  943. //
  944. void LexStream::PrintSmallSource(int k)
  945. {
  946.     int left_line_no = FindLine(bad_tokens[k].start_location);
  947.  
  948.     Coutput << "\n\n";
  949.     Coutput.width(6);
  950.     Coutput << left_line_no;
  951.     Coutput << ". ";
  952.     for (int i = this -> LineStart(left_line_no); i <= this -> LineEnd(left_line_no); i++)
  953.         Coutput << this -> InputBuffer()[i];
  954.  
  955.     int left_column_no = FindColumn(bad_tokens[k].start_location),
  956.         right_column_no = FindColumn(bad_tokens[k].end_location);
  957.  
  958.     Coutput.width(left_column_no + 7);
  959.     Coutput << "";
  960.     if (left_column_no == right_column_no)
  961.         Coutput << '^';
  962.     else
  963.     {
  964.         int offset = 0;
  965.         for (size_t i = bad_tokens[k].start_location; i <= bad_tokens[k].end_location; i++)
  966.         {
  967.             if (this -> InputBuffer()[i] > 0xff)
  968.                 offset += 5;
  969.         }
  970.  
  971.         Coutput << '<';
  972.         Coutput.width(right_column_no - left_column_no + offset);
  973.         Coutput.fill('-');
  974.         Coutput << ">";
  975.         Coutput.fill(' ');
  976.     }
  977.  
  978.     return;
  979. }
  980.  
  981.  
  982. //
  983. // This procedure is invoked to print a large message that may
  984. // span more than one line. The parameter message points to the
  985. // starting line. The parameter k points to the error message in
  986. // the error structure.
  987. //
  988. void LexStream::PrintLargeSource(int k)
  989. {
  990.     int left_line_no    = FindLine(bad_tokens[k].start_location),
  991.         left_column_no  = FindColumn(bad_tokens[k].start_location),
  992.         right_line_no   = FindLine(bad_tokens[k].end_location),
  993.         right_column_no = FindColumn(bad_tokens[k].end_location);
  994.  
  995.     if (left_line_no == right_line_no)
  996.     {
  997.         if (left_line_no == 0)
  998.             Coutput << "\n";
  999.         else
  1000.         {
  1001.             Coutput << "\n\n";
  1002.             Coutput.width(6);
  1003.             Coutput << left_line_no << ". ";
  1004.             for (int i = this -> LineStart(left_line_no); i <= this -> LineEnd(left_line_no); i++)
  1005.                 Coutput << this -> InputBuffer()[i];
  1006.  
  1007.             int offset = 0;
  1008.             for (size_t j = bad_tokens[k].start_location; j <= bad_tokens[k].end_location; j++)
  1009.             {
  1010.                 if (this -> InputBuffer()[j] > 0xff)
  1011.                     offset += 5;
  1012.             }
  1013.  
  1014.             Coutput.width(left_column_no + 8);
  1015.             Coutput << "<";
  1016.             Coutput.width(right_column_no - left_column_no + offset);
  1017.             Coutput.fill('-');
  1018.             Coutput << ">";
  1019.             Coutput.fill(' ');
  1020.         }
  1021.     }
  1022.     else
  1023.     {
  1024.         Coutput << "\n\n";
  1025.         Coutput.width(left_column_no + 8);
  1026.         Coutput << "<";
  1027.  
  1028.         int segment_size = Tab::Wcslen(input_buffer, bad_tokens[k].start_location,
  1029.                                                      LineEnd(FindLine(bad_tokens[k].start_location)));
  1030.         Coutput.width(segment_size - 1);
  1031.         Coutput.fill('-');
  1032.         Coutput << "\n";
  1033.         Coutput.fill(' ');
  1034.  
  1035.         Coutput.width(6);
  1036.         Coutput << left_line_no << ". ";
  1037.         for (int i = this -> LineStart(left_line_no); i <= this -> LineEnd(left_line_no); i++)
  1038.             Coutput << this -> InputBuffer()[i];
  1039.  
  1040.         if (right_line_no > left_line_no + 1)
  1041.         {
  1042.             Coutput.width(left_column_no + 7);
  1043.             Coutput << " ";
  1044.             Coutput << ". . .\n";
  1045.         }
  1046.  
  1047.         Coutput.width(6);
  1048.         Coutput << right_line_no << ". ";
  1049.  
  1050.         int offset = 0;
  1051.         for (int j = this -> LineStart(right_line_no); j <= this -> LineEnd(right_line_no); j++)
  1052.         {
  1053.             wchar_t c = this -> InputBuffer()[j];
  1054.             if (c > 0xff)
  1055.                 offset += 5;
  1056.             Coutput << c;
  1057.         }
  1058.  
  1059.         Coutput.width(8);
  1060.         Coutput << "";
  1061.         Coutput.width(right_column_no - 1 + offset);
  1062.         Coutput.fill('-');
  1063.         Coutput << ">";
  1064.         Coutput.fill(' ');
  1065.     }
  1066.  
  1067.     return;
  1068. }
  1069.  
  1070.  
  1071. void LexStream::PrintMessage(StreamError::StreamErrorKind kind)
  1072. {
  1073.     switch(kind)
  1074.     {
  1075.         case StreamError::BAD_TOKEN:
  1076.              Coutput << "Illegal token";
  1077.              break;
  1078.         case StreamError::BAD_OCTAL_CONSTANT:
  1079.              Coutput << "Octal constant contains invalid digit";
  1080.              break;
  1081.         case StreamError::EMPTY_CHARACTER_CONSTANT:
  1082.              Coutput << "Empty character constant";
  1083.              break;
  1084.         case StreamError::UNTERMINATED_CHARACTER_CONSTANT:
  1085.              Coutput << "Character constant not properly terminated";
  1086.              break;
  1087.         case StreamError::UNTERMINATED_COMMENT:
  1088.              Coutput << "Comment not properly terminated";
  1089.              break;
  1090.         case StreamError::UNTERMINATED_STRING_CONSTANT:
  1091.              Coutput << "String constant not properly terminated";
  1092.              break;
  1093.         case StreamError::INVALID_HEX_CONSTANT:
  1094.              Coutput << "The prefix 0x must be followed by at least one hex digit";
  1095.              break;
  1096.         case StreamError::INVALID_FLOATING_CONSTANT_EXPONENT:
  1097.              Coutput << "floating-constant exponent has no digit";
  1098.              break;
  1099.         case StreamError::INVALID_UNICODE_ESCAPE:
  1100.              Coutput << "Invalid unicode escape character";
  1101.              break;
  1102.         default:
  1103.              assert(false);
  1104.     }
  1105.  
  1106.     Coutput << '\n';
  1107.  
  1108.     return;
  1109. }
  1110.  
  1111.